# Description ------------------------------------------------------------------
#   - Analysis of sample of all PG&E and SoCalGas zip codes.
#   - Follows buildPanelAllZips.R.

# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = FALSE)
  # Packages
  library(pacman)
  p_load(data.table, stringr, lubridate, lfe, magrittr)
  # Set the number of threads for the lfe package
  # options(lfe.threads = 32)
  # Directories of joined bill-weather files
  dir_project <- "S:/Edward/Elasticity/"
  # dir_project <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
  dir_csv     <- paste0(dir_project, "DataCsv/")
  dir_rds     <- paste0(dir_project, "DataR/")
  dir_sample  <- paste0(dir_rds, "Prices/SampledSubsets/")
  dir_figures <- paste0(dir_project, "Figures/")
  dir_output  <- paste0(dir_project, "Output/")
  dir_results <- paste0(dir_rds, "Results/Results20171024/JointCA/")

# My felm function -------------------------------------------------------------
  full_summary <- function(object, name) {
# HACK: Try the variout lags that might show up. Not ideal.
    if ("stage1" %in% names(object)) {
      # Calculate F stat in first stage
      try(f_stat <- waldtest(object$stage1,
        ~ spot_price_1week | `spot_price_1week:utilitysocal`,
        type = "cluster")[["F"]], silent = T)
      try(f_stat <- waldtest(object$stage1,
        ~ spot_price_1week_lag1 | `spot_price_1week_lag1:utilitysocal`,
        type = "cluster")[["F"]], silent = T)
      try(f_stat <- waldtest(object$stage1,
        ~ spot_price_1week_lag2 | `spot_price_1week_lag2:utilitysocal`,
        type = "cluster")[["F"]], silent = T)
      try(f_stat <- waldtest(object$stage1,
        ~ spot_price_1week_lag3 | `spot_price_1week_lag3:utilitysocal`,
        type = "cluster")[["F"]], silent = T)
      # Create the first-stage list
      list1 <- list(
        coef = object$stage1 %>% summary() %>% coef(),
        fe   = object$stage1$fe %>% names(),
        N    = object$stage1$N,
        k    = object$stage1$p,
        call = object$call,
        f    = f_stat,
        lhs  = object$stage1$lhs)
    } else {
      list1 <- NULL
    }
    # Create the second-stage list
    list2 <- list(
      coef = object %>% summary() %>% coef(),
      fe   = object$fe %>% names(),
      N    = object$N,
      k    = object$p,
      call = object$call,
      lhs  = object$lhs)
    # Save
    saveRDS(list(list1, list2),
      paste0(dir_results, "jointCA__", name, ".rds"))
  }

# Function to run regressions (especially 2SLS) --------------------------------
  reg_run <- function(var_lhs, var_exog, var_endo,
    fe_hh, fe_space, fe_time, var_iv, var_cl, lag, care_sub, season_sub) {
      # Function that replaces underscored names with camel case
      # Also: remove math
      to_camel <- . %>% str_replace_all("_", " ") %>%
        str_to_title() %>%
        str_replace_all(" ", "") %>%
        str_replace("[I][(]", "") %>%
        str_replace_all("[()/]", "")
      # Create the lag suffix
      l <- ifelse(lag > 0, paste0("_lag", lag), "")
      # Add lags to endogenous variables IVs
      if (!is.null(var_endo)) {
        var_endo_l <- ifelse(lag <= 0,
          yes = var_endo,
          no = ifelse(str_sub(var_endo, -1) != ")",
            yes = paste0(var_endo, l),
            no = paste0(str_sub(var_endo, 1, -2), l, ")")))
        var_iv_l <- ifelse(lag <= 0,
          yes = var_iv,
          no = ifelse(str_sub(var_iv, -1) != ")",
            yes = paste0(var_iv, l),
            no = paste0(str_sub(var_iv, 1, -2), l, ")")))
      }
      # Create the regression formula
      eqn <- paste0(
        var_lhs, " ~ ",
        # "Exogenous" variables
        paste(var_exog, collapse = " + "), " | ",
        # Fixed effects
        fe_hh, " + ",
        ifelse(fe_space == "", fe_time, paste0(fe_space, "_", fe_time)), " | ",
        # Instruments
        ifelse(is.null(var_endo),
          " 0 ",
          paste0("( ", var_endo_l, " ~ ",
            var_iv_l, " + ",
            var_iv_l, ":utility)")),
          " | ",
        # Clustering
        paste(var_cl, collapse = " + ")
        ) %>% as.formula()
        # CARE values for subsetting
        if (is.null(care_sub)) {
          care_val <- sub_dt[,care] %>% unique()
        } else {
          care_val <- care_sub
        }
        # Season values for subsetting
        if (is.null(season_sub)) {
          season_val <- sub_dt[,season] %>% unique()
        } else {
          season_val <- season_sub
        }
        # Run the regression
        est <- felm(
          eqn,
          data = sub_dt[care %in% care_val & season %in% season_val],
          psdef = F, exactDOF = T)
        # Create filename
        name <- paste(
          ifelse(is.null(var_endo), "OLS", to_camel(var_endo)),
          "_",
          paste0("Lag", lag),
          "_",
          paste0(to_camel(fe_space), to_camel(fe_time)),
          to_camel(fe_hh),
          "_",
          ifelse(is.null(care_sub), "All",
            ifelse(care_sub == T, "Care", "Noncare")),
          "_",
          ifelse(is.null(season_sub), "All", to_camel(season_sub)),
          "_",
          sapply(X = var_exog, FUN = to_camel) %>% paste(collapse = "_"),
          sep = "_") %>% str_replace_all("__", "_")
        # Save the results
        full_summary(object = est, name = name)
  }

# Function to run OLS regressions ----------------------------------------------
  ols_run <- function(var_lhs, price_exog, other_exog,
    fe_hh, fe_space, fe_time, var_cl, lag, care_sub, season_sub) {
      # Function that replaces underscored names with camel case
      # Also: remove math
      to_camel <- . %>% str_replace_all("_", " ") %>%
        str_to_title() %>%
        str_replace_all(" ", "") %>%
        str_replace("[I][(]", "") %>%
        str_replace_all("[()/]", "")
      # Create the lag suffix
      l <- ifelse(lag > 0, paste0("_lag", lag), "")
      # Add lags to endogenous variables IVs
      if (!is.null(price_exog)) {
        price_exog_l <- ifelse(lag <= 0,
          yes = price_exog,
          no = ifelse(str_sub(price_exog, -1) != ")",
            yes = paste0(price_exog, l),
            no = paste0(str_sub(price_exog, 1, -2), l, ")")))
      }
      # Create the regression formula
      eqn <- paste0(
        var_lhs, " ~ ",
        # (Assumed) Exogenous price with appropriate lag
        ifelse(!is.null(price_exog), paste0(price_exog_l, " + "), ""),
        # Other "exogenous" variables
        paste(other_exog, collapse = " + "), " | ",
        # Fixed effects
        fe_hh, " + ",
        ifelse(fe_space == "", fe_time, paste0(fe_space, "_", fe_time)), " | ",
        # No instruments
        " 0 | ",
        # Clustering
        paste(var_cl, collapse = " + ")
        ) %>% as.formula()
        # CARE values for subsetting
        if (is.null(care_sub)) {
          care_val <- sub_dt[,care] %>% unique()
        } else {
          care_val <- care_sub
        }
        # Season values for subsetting
        if (is.null(season_sub)) {
          season_val <- sub_dt[,season] %>% unique()
        } else {
          season_val <- season_sub
        }
        # Run the regression
        est <- felm(
          eqn,
          data = sub_dt[care %in% care_val & season %in% season_val],
          psdef = F, exactDOF = T)
        # Create filename
        name <- paste(
          "OLS",
          "_",
          to_camel(var_lhs),
          "_",
          sapply(X = c(price_exog, other_exog), FUN = to_camel) %>%
            paste(collapse = "_"),
          "_",
          paste0("Lag", lag),
          "_",
          paste0(to_camel(fe_space), to_camel(fe_time)),
          to_camel(fe_hh),
          "_",
          ifelse(is.null(care_sub), "All",
            ifelse(care_sub == T, "Care", "Noncare")),
          "_",
          ifelse(is.null(season_sub), "All", to_camel(season_sub)),
          sep = "_") %>% str_replace_all("__", "_")
        # Save the results
        full_summary(object = est, name = name)
  }

# Load the common zip code panel -----------------------------------------------
  # Load the common zip code data
  bill_dt <- readRDS(paste0(dir_sample, "joint05Pct2010to2014.rds"))
  # Create lags of flag_thm and flag_rev
  setorder(bill_dt, hh_id, begin_date)
  bill_dt[, `:=`(
    flag_rev_lag1  = shift(flag_rev,  1L, fill = NA, type = "lag"),
    flag_thm_lag1  = shift(flag_thm,  1L, fill = NA, type = "lag"),
    flag_rev_lag2  = shift(flag_rev,  2L, fill = NA, type = "lag"),
    flag_thm_lag2  = shift(flag_thm,  2L, fill = NA, type = "lag"),
    flag_rev_lag3  = shift(flag_rev,  3L, fill = NA, type = "lag"),
    flag_thm_lag3  = shift(flag_thm,  3L, fill = NA, type = "lag"),
    # flag_rev_lag12 = shift(flag_rev, 12L, fill = NA, type = "lag"),
    # flag_thm_lag12 = shift(flag_thm, 12L, fill = NA, type = "lag"),
    # flag_rev_lag13 = shift(flag_rev, 13L, fill = NA, type = "lag"),
    # flag_thm_lag13 = shift(flag_thm, 13L, fill = NA, type = "lag"),
    # flag_rev_lag14 = shift(flag_rev, 13L, fill = NA, type = "lag"),
    # flag_thm_lag14 = shift(flag_thm, 13L, fill = NA, type = "lag"),
    TMP = numeric()
  ), by = hh_id][, TMP := NULL]
  # Drop observations flagged due to problems re-calculating bills
# NOTE: I flagged bills with absolute percent error of 5% or more.
  bill_dt <- bill_dt[
    thm > 0 & flag_thm == 0 & bill_flag == F & flag_rev == 0 &
    thm_lag1 > 0 & flag_thm_lag1 == 0 & bill_flag_lag1 == F & flag_rev_lag1 == 0 &
    thm_lag2 > 0 & flag_thm_lag2 == 0 & bill_flag_lag2 == F & flag_rev_lag2 == 0 &
    thm_lag3 > 0 & flag_thm_lag3 == 0 & bill_flag_lag3 == F & flag_rev_lag3 == 0 &
# NOTE: Uncomment for differencing specification
    # thm_lag12 > 0 & flag_thm_lag12 == 0 & bill_flag_lag12 == F & flag_rev_lag12 == 0 &
    # thm_lag13 > 0 & flag_thm_lag13 == 0 & bill_flag_lag13 == F & flag_rev_lag13 == 0 &
    # thm_lag14 > 0 & flag_thm_lag14 == 0 & bill_flag_lag14 == F & flag_rev_lag14 == 0 &
    T]

# Add exceeded summaries -------------------------------------------------------
  # Calculate whether a bill exceeds the allowance
  bill_dt[, exceeded := thm > allowance]
  # Percentage of times a bill exceeds the allowance within a HH
  bill_dt[, pct_exceeded := mean(exceeded), by = hh_id]
  # Summaries by HH
  ex_dt <- bill_dt[, .(hh_id, pct_exceeded)] %>% unique()
  # Save ex_dt
  saveRDS(
    object = ex_dt,
    file = paste0(dir_results, "exceededStatsJointCA.rds"))

# Rename dataset ---------------------------------------------------------------
  # Rename the dataset so that it matches the functions
  sub_dt <- bill_dt
  rm(bill_dt); gc()

# OLS Regressions: Marginal price, month FEs -----------------------------------
  lapply(X = 0:2, FUN = function(x) {
    ols_run(
      var_lhs    = "log(thm_day)",
      price_exog = "log(price_mrg)",
      other_exog = "I(bill_hdd/days)",
      fe_hh      = "hh_id",
      fe_space   = "",
      fe_time    = "ym",
      var_cl     = c("hh_id", "price_cluster"),
      lag        = x,
      care_sub   = NULL,
      season_sub = NULL)
    })

# OLS Regressions: Baseline price, month FEs -----------------------------------
  lapply(X = 0:2, FUN = function(x) {
    ols_run(
      var_lhs    = "log(thm_day)",
      price_exog = "log(price_base)",
      other_exog = "I(bill_hdd/days)",
      fe_hh      = "hh_id",
      fe_space   = "",
      fe_time    = "ym",
      var_cl     = c("hh_id", "price_cluster"),
      lag        = x,
      care_sub   = NULL,
      season_sub = NULL)
    })

# OLS Regressions: Marginal price, city-month FEs ------------------------------
  lapply(X = 0:2, FUN = function(x) {
    ols_run(
      var_lhs    = "log(thm_day)",
      price_exog = "log(price_mrg)",
      other_exog = "I(bill_hdd/days)",
      fe_hh      = "hh_id",
      fe_space   = "city",
      fe_time    = "ym",
      var_cl     = c("hh_id", "price_cluster"),
      lag        = x,
      care_sub   = NULL,
      season_sub = NULL)
    })

# OLS Regressions: Baseline price, city-month FEs ------------------------------
  lapply(X = 0:2, FUN = function(x) {
    ols_run(
      var_lhs    = "log(thm_day)",
      price_exog = "log(price_base)",
      other_exog = "I(bill_hdd/days)",
      fe_hh      = "hh_id",
      fe_space   = "city",
      fe_time    = "ym",
      var_cl     = c("hh_id", "price_cluster"),
      lag        = x,
      care_sub   = NULL,
      season_sub = NULL)
    })

# # Regressions: Levels, marginal price, 0 lags, city-month ----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg) ~
#       spot_price_1week + spot_price_1week:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag0")
#
# # Regressions: Levels, marginal price, 1 lag, city-month -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag1) ~
#       spot_price_1week_lag1 + spot_price_1week_lag1:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag1")
#
# # Regressions: Levels, marginal price, 2 lags, city-month ----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2")
#
# # Regressions: Levels, marginal price, 3 lags, city-month ----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag3) ~
#       spot_price_1week_lag3 + spot_price_1week_lag3:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag3")
#
# # Regressions: Levels, marginal price, 0 lags, city-week -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg) ~
#       spot_price_1week + spot_price_1week:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag0")
#
# # Regressions: Levels, marginal price, 1 lag, city-week ------------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag1) ~
#       spot_price_1week_lag1 + spot_price_1week_lag1:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag1")
#
# # Regressions: Levels, marginal price, 2 lags, city-week -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2")
#
# # Regressions: Levels, marginal price, 3 lags, city-week -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag3) ~
#       spot_price_1week_lag3 + spot_price_1week_lag3:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag3")
#
# # Seasonal heterogeneity: Levels, mrg. price, city, month, 2 lags --------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_summer")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_winter")
#
# # Seasonal heterogeneity: Levels, mrg. price, city, week, 2 lags ---------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_summer")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_winter")
#
# # Income heterogeneity: Levels, mrg. price, city, month, 2 lags ----------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_noncare")
#
# # Income heterogeneity: Levels, mrg. price, city, week, 2 lags -----------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_noncare")
#
# # Two-way heterogeneity: Levels, mrg. price, city, month, 2 lags ---------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T & season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_summer_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F & season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_summer_noncare")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T & season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_winter_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F & season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_month_lag2_winter_noncare")
#
# # Two-way heterogeneity: Levels, mrg. price, city, week, 2 lags ----------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T & season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_summer_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F & season == "summer"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_summer_noncare")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == T & season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_winter_care")
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     city_yw + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt[care == F & season == "winter"], psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_city_week_lag2_winter_noncare")
#
# # Regressions: Levels, marginal price, 0 lags, zip-month -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     zip_ym + hh_id |
#     (log(price_mrg) ~
#       spot_price_1week + spot_price_1week:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_zip_month_lag0")
#
# # Regressions: Levels, marginal price, 1 lag, zip-month ------------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     zip_ym + hh_id |
#     (log(price_mrg_lag1) ~
#       spot_price_1week_lag1 + spot_price_1week_lag1:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_zip_month_lag1")
#
# # Regressions: Levels, marginal price, 2 lags, zip-month -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     zip_ym + hh_id |
#     (log(price_mrg_lag2) ~
#       spot_price_1week_lag2 + spot_price_1week_lag2:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_zip_month_lag2")
#
# # Regressions: Levels, marginal price, 3 lags, zip-month -----------------------
#   felm(
#     log(thm_day) ~
#     bill_hdd |
#     zip_ym + hh_id |
#     (log(price_mrg_lag3) ~
#       spot_price_1week_lag3 + spot_price_1week_lag3:utility) |
#     hh_id + price_cluster,
#     data = bill_dt, psdef = F, exactDOF = T) %>%
#     full_summary("lvl_mrg_zip_month_lag3")
#
# # NOTE: Build a table of results:
#
# # Setup ------------------------------------------------------------------------
#   # Options
#   options(stringsAsFactors = FALSE)
#   # Packages
#   library(pacman)
#   p_load(data.table, stringr, lubridate, lfe, magrittr)
#   # Set the number of threads for the lfe package
#   # options(lfe.threads = 32)
#   # Directories of joined bill-weather files
#   dir_project <- "S:/Edward/Elasticity/"
#   # dir_project <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
#   dir_csv     <- paste0(dir_project, "DataCsv/")
#   dir_rds     <- paste0(dir_project, "DataR/")
#   dir_figures <- paste0(dir_project, "Figures/")
#   dir_output  <- paste0(dir_project, "Output/")
#   dir_results <- paste0(dir_rds, "Results/Results20171017/")
#
# # Load results -----------------------------------------------------------------
#   # Find all files in the results directory
#   files_v <- dir_results %>% dir() %>% str_subset("jointCA_") %>%
#     grep("full", ., value = T, invert = T)
#   # Load the results
#   result_dt <- lapply(X = files_v, FUN = function(x) {
#     tmp <- x %>% paste0(dir_results, .) %>% readRDS() %>% extract2(2) %$% coef
#     tmp_dt <- data.table(tmp)[grep("fit", row.names(tmp)),]
#     tmp_dt[, name := str_replace(x, ".rds", "")]
#     return(tmp_dt)
#     }) %>% rbindlist()
#   setnames(result_dt, c("Estimate", "S.E.", "t Stat.", "p Value", "name"))
#   # Add columns for more descriptions
#   result_dt[, c("Price", "Spatial FE", "Temporal FE", "Lag") :=
#     tstrsplit(name, "_", fixed = T, keep = 3L:6L)]
#   # Update 'Lag' to only numeric
#   result_dt[, Lag := str_replace(Lag, "lag", "")]
#   # Update 'Price'
#   result_dt[Price == "mrg", Price := "Marginal"]
#   # Capitalize FEs
#   result_dt[, "Spatial FE" := str_to_title(`Spatial FE`)]
#   result_dt[, "Temporal FE" := str_to_title(`Temporal FE`)]
#   # Care status
#   result_dt[, Income := ""]
#   result_dt[grepl("care", name), Income := "CARE"]
#   result_dt[grepl("noncare", name), Income := "Non-CARE"]
#   # Season
#   result_dt[, Season := ""]
#   result_dt[grepl("summer", name), Season := "Summer"]
#   result_dt[grepl("winter", name), Season := "Winter"]
#   # Add indicator for 'full dataset'
#   result_dt[, Dataset := "Shared zip codes"]
#   result_dt[grepl("full", name), Dataset := "Shared cities"]
#   # Drop 'name'
#   result_dt[, name := NULL]
#   # Create heterogeneity groups
#   result_dt[, grp := 1 * (Income == "") + 2 * (Season == "")]
#   setorder(result_dt, -"Dataset", -"grp", "Price", "Spatial FE", "Temporal FE", "Lag", "Income", "Season")
#   result_dt[, grp := NULL]
#   # Round
#   result_dt[, Estimate := round(Estimate, 3)]
#   result_dt[, "S.E." := round(result_dt[,"S.E."], 3)]
#   result_dt[, "t Stat." := round(result_dt[,"t Stat."], 3)]
#   result_dt[, "p Value" := round(result_dt[,"p Value"], 3)]
#   # Drop 'Dataset' (irrelevant)
#   result_dt[, Dataset := NULL]
#   # Format for tex
#   sp <- "&"
#   result_dt <- cbind(
#     result_dt[,1],  sp,
#     result_dt[,2],  sp,
#     result_dt[,3],  sp,
#     result_dt[,4],  sp,
#     result_dt[,5],  sp,
#     result_dt[,6],  sp,
#     result_dt[,7],  sp,
#     result_dt[,8],  sp,
#     result_dt[,9],  sp,
#     result_dt[,10], end = "\\\\")
#   names(result_dt)[names(result_dt) == "sp"] <- ""
#   names(result_dt)[ncol(result_dt)] <- ""
#   # Print
#   result_dt %>% knitr::kable("pandoc")

# Summary statistics -----------------------------------------------------------
  # Desired variables
  var_dt <- sub_dt[, .(price_base, price_avg, price_mrg, thm, days, thm_day,
    total_bill, care, utility, season)]
  # Overall summaries
  all_mean <- var_dt[, -c("season", "utility")][,
    lapply(X = .SD, FUN = function(x) x %>% mean() %>% round(4))]
  all_sd <- var_dt[, -c("season", "utility")][,
    lapply(X = .SD, FUN = function(x) x %>% sd() %>% round(4))]
  # Seasonal summaries
  var_dt[,-"utility"][, lapply(X = .SD, FUN = mean), by = season]
  var_dt[,-"utility"][, lapply(X = .SD, FUN = sd), by = season]
  # CARE summaries
  var_dt[, lapply(X = .SD, FUN = mean), by = care]
  var_dt[, lapply(X = .SD, FUN = sd), by = care]
  # Utility summaries
  util_mean <- var_dt[,-"season"][,
    lapply(X = .SD, FUN = function(x) x %>% mean() %>% round(4)), by = utility]
  util_sd <- var_dt[,-"season"][,
    lapply(X = .SD, FUN = function(x) x %>% sd() %>% round(4)), by = utility]
